/* Remove previously mapped page if it was present. */
if (prev_mfn && mfn_valid(prev_mfn)) {
- if (IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)))
+ if (is_xen_heap_frame(mfn_to_page(prev_mfn)))
/* Xen heap frames are simply unhooked from this phys slot. */
guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
else
obj-y += string.o
obj-y += sysctl.o
obj-y += time.o
-obj-y += trampoline.o
obj-y += traps.o
obj-y += usercopy.o
obj-y += x86_emulate.o
./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \
`$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'`
-$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds
+ALL_OBJS := $(BASEDIR)/arch/x86/boot/built_in.o $(ALL_OBJS)
+
+$(TARGET)-syms: $(ALL_OBJS) xen.lds
$(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/common/symbols-dummy.o
- $(LD) $(LDFLAGS) -T xen.lds -N \
- boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
+ $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
$(BASEDIR)/common/symbols-dummy.o -o $(@D)/.$(@F).0
$(NM) -n $(@D)/.$(@F).0 | $(BASEDIR)/tools/symbols >$(@D)/.$(@F).0.S
$(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).0.o
- $(LD) $(LDFLAGS) -T xen.lds -N \
- boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
+ $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
$(@D)/.$(@F).0.o -o $(@D)/.$(@F).1
$(NM) -n $(@D)/.$(@F).1 | $(BASEDIR)/tools/symbols >$(@D)/.$(@F).1.S
$(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).1.o
- $(LD) $(LDFLAGS) -T xen.lds -N \
- boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
+ $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
$(@D)/.$(@F).1.o -o $@
rm -f $(@D)/.$(@F).[0-9]*
--- /dev/null
+obj-y += head.o
+
+head.o: head.S trampoline.S $(TARGET_SUBARCH).S
--- /dev/null
+#include <xen/config.h>
+#include <xen/multiboot.h>
+#include <public/xen.h>
+#include <asm/asm_defns.h>
+#include <asm/desc.h>
+#include <asm/page.h>
+#include <asm/msr.h>
+
+ .text
+ .code32
+
+#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START)
+#define SYM_TRAMP_PHYS(sym) ((sym) - trampoline_start + BOOT_TRAMPOLINE)
+
+#define TRAMP_CS32 0x0008
+#define TRAMP_CS64 0x0010
+#define TRAMP_DS 0x0018
+
+ENTRY(start)
+ jmp __start
+
+ .align 4
+/*** MULTIBOOT HEADER ****/
+#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
+ MULTIBOOT_HEADER_WANT_MEMORY)
+ /* Magic number indicating a Multiboot header. */
+ .long MULTIBOOT_HEADER_MAGIC
+ /* Flags to bootloader (see Multiboot spec). */
+ .long MULTIBOOT_HEADER_FLAGS
+ /* Checksum: must be the negated sum of the first two fields. */
+ .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
+
+.Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
+.Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
+
+bad_cpu:
+ mov $(SYM_PHYS(.Lbad_cpu_msg)),%esi # Error message
+ jmp print_err
+not_multiboot:
+ mov $(SYM_PHYS(.Lbad_ldr_msg)),%esi # Error message
+print_err:
+ mov $0xB8000,%edi # VGA framebuffer
+1: mov (%esi),%bl
+ test %bl,%bl # Terminate on '\0' sentinel
+2: je 2b
+ mov $0x3f8+5,%dx # UART Line Status Register
+3: in %dx,%al
+ test $0x20,%al # Test THR Empty flag
+ je 3b
+ mov $0x3f8+0,%dx # UART Transmit Holding Register
+ mov %bl,%al
+ out %al,%dx # Send a character over the serial line
+ movsb # Write a character to the VGA framebuffer
+ mov $7,%al
+ stosb # Write an attribute to the VGA framebuffer
+ jmp 1b
+
+gdt_boot_descr:
+ .word 4*8-1
+ .long SYM_PHYS(trampoline_gdt)
+
+__start:
+ cld
+ cli
+
+ /* Initialise GDT and basic data segments. */
+ lgdt %cs:SYM_PHYS(gdt_boot_descr)
+ mov $TRAMP_DS,%ecx
+ mov %ecx,%ds
+ mov %ecx,%es
+
+ /* Check for Multiboot bootloader */
+ cmp $0x2BADB002,%eax
+ jne not_multiboot
+
+ /* Save the Multiboot info structure for later use. */
+ mov %ebx,SYM_PHYS(multiboot_ptr)
+
+ /* Initialize BSS (no nasty surprises!) */
+ mov $SYM_PHYS(__bss_start),%edi
+ mov $SYM_PHYS(_end),%ecx
+ sub %edi,%ecx
+ xor %eax,%eax
+ rep stosb
+
+ /* Interrogate CPU extended features via CPUID. */
+ mov $0x80000000,%eax
+ cpuid
+ xor %edx,%edx
+ cmp $0x80000000,%eax # any function > 0x80000000?
+ jbe 1f
+ mov $0x80000001,%eax
+ cpuid
+1: mov %edx,SYM_PHYS(cpuid_ext_features)
+
+#if defined(__x86_64__)
+ /* Check for availability of long mode. */
+ bt $29,%edx
+ jnc bad_cpu
+ /* Initialise L2 identity-map and xen page table entries (16MB). */
+ mov $SYM_PHYS(l2_identmap),%edi
+ mov $SYM_PHYS(l2_xenmap),%esi
+ mov $0x1e3,%eax /* PRESENT+RW+A+D+2MB+GLOBAL */
+ mov $8,%ecx
+1: mov %eax,(%edi)
+ add $8,%edi
+ mov %eax,(%esi)
+ add $8,%esi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ loop 1b
+ /* Initialise L3 identity-map page directory entries. */
+ mov $SYM_PHYS(l3_identmap),%edi
+ mov $(SYM_PHYS(l2_identmap)+7),%eax
+ mov $4,%ecx
+1: mov %eax,(%edi)
+ add $8,%edi
+ add $PAGE_SIZE,%eax
+ loop 1b
+ /* Initialise L3 xen-map page directory entry. */
+ mov $(SYM_PHYS(l2_xenmap)+7),%eax
+ mov %eax,SYM_PHYS(l3_xenmap) + (50*8)
+ /* Hook indentity-map and xen-map L3 tables into PML4. */
+ mov $(SYM_PHYS(l3_identmap)+7),%eax
+ mov %eax,SYM_PHYS(idle_pg_table) + ( 0*8) /* PML4[ 0]: 1:1 map */
+ mov %eax,SYM_PHYS(idle_pg_table) + (262*8) /* PML4[262]: 1:1 map */
+ mov $(SYM_PHYS(l3_xenmap)+7),%eax
+ mov %eax,SYM_PHYS(idle_pg_table) + (261*8) /* PML4[261]: xen map */
+#elif defined(CONFIG_X86_PAE)
+ /* Initialize low and high mappings of memory with 2MB pages */
+ mov $SYM_PHYS(idle_pg_table_l2),%edi
+ mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */
+1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
+ stosl /* low mapping */
+ add $4,%edi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $DIRECTMAP_PHYS_END+0xe3,%eax
+ jne 1b
+1: stosl /* low mappings cover up to 16MB */
+ add $4,%edi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $(16<<20)+0xe3,%eax
+ jne 1b
+#else
+ /* Initialize low and high mappings of memory with 4MB pages */
+ mov $SYM_PHYS(idle_pg_table),%edi
+ mov $0xe3,%eax /* PRESENT+RW+A+D+4MB */
+1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
+ stosl /* low mapping */
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $DIRECTMAP_PHYS_END+0xe3,%eax
+ jne 1b
+1: stosl /* low mappings cover up to 16MB */
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $(16<<20)+0xe3,%eax
+ jne 1b
+#endif
+
+ /* Copy bootstrap trampoline to low memory, below 1MB. */
+ mov $SYM_PHYS(trampoline_start),%esi
+ mov $SYM_TRAMP_PHYS(trampoline_start),%edi
+ mov $trampoline_end - trampoline_start,%ecx
+ rep movsb
+
+ /* EBX == 0 indicates we are the BP (Boot Processor). */
+ xor %ebx,%ebx
+
+ /* Jump into the relocated trampoline. */
+ jmp $TRAMP_CS32,$SYM_TRAMP_PHYS(trampoline_protmode_entry)
+
+ .globl trampoline_start, trampoline_end
+trampoline_start:
+#include "trampoline.S"
+trampoline_end:
+
+__high_start:
+#ifdef __x86_64__
+#include "x86_64.S"
+#else
+#include "x86_32.S"
+#endif
--- /dev/null
+ .code16
+
+ .globl trampoline_realmode_entry
+trampoline_realmode_entry:
+ nop # We use this byte as a progress flag
+ movb $0xA5,trampoline_cpu_started - trampoline_start
+ cld
+ cli
+ lidt %cs:idt_48 - trampoline_start
+ lgdt %cs:gdt_48 - trampoline_start
+ xor %ax, %ax
+ inc %ax
+ lmsw %ax # CR0.PE = 1 (enter protected mode)
+ mov $1,%bl # EBX != 0 indicates we are an AP
+ jmp 1f
+1: ljmpl $TRAMP_CS32,$SYM_TRAMP_PHYS(trampoline_protmode_entry)
+
+idt_48: .word 0, 0, 0 # base = limit = 0
+gdt_48: .word 4*8-1
+ .long SYM_TRAMP_PHYS(trampoline_gdt)
+trampoline_gdt:
+ .quad 0x0000000000000000 /* 0x0000: unused */
+ .quad 0x00cf9a000000ffff /* 0x0008: ring 0 code, 32-bit mode */
+ .quad 0x00af9a000000ffff /* 0x0010: ring 0 code, 64-bit mode */
+ .quad 0x00cf92000000ffff /* 0x0018: ring 0 data */
+
+cpuid_ext_features:
+ .long 0
+
+ .globl trampoline_xen_phys_start
+trampoline_xen_phys_start:
+ .long 0
+
+ .globl trampoline_cpu_started
+trampoline_cpu_started:
+ .byte 0
+
+ .code32
+trampoline_protmode_entry:
+ /* Set up a few descriptors: on entry only CS is guaranteed good. */
+ mov $TRAMP_DS,%eax
+ mov %eax,%ds
+ mov %eax,%es
+
+ /* Set up FPU. */
+ fninit
+
+ /* Initialise CR4. */
+#if CONFIG_PAGING_LEVELS == 2
+ mov $X86_CR4_PSE,%ecx
+#else
+ mov $X86_CR4_PAE,%ecx
+#endif
+ mov %ecx,%cr4
+
+ /* Load pagetable base register. */
+ mov $SYM_PHYS(idle_pg_table),%eax
+ add SYM_TRAMP_PHYS(trampoline_xen_phys_start),%eax
+ mov %eax,%cr3
+
+#if CONFIG_PAGING_LEVELS != 2
+ /* Set up EFER (Extended Feature Enable Register). */
+ movl $MSR_EFER,%ecx
+ rdmsr
+#if CONFIG_PAGING_LEVELS == 4
+ btsl $_EFER_LME,%eax /* Long Mode */
+ btsl $_EFER_SCE,%eax /* SYSCALL/SYSRET */
+#endif
+ mov SYM_TRAMP_PHYS(cpuid_ext_features),%edi
+ btl $20,%edi /* CPUID 0x80000001, EDX[20] */
+ jnc 1f
+ btsl $_EFER_NX,%eax /* No-Execute */
+1: wrmsr
+#endif
+
+ mov $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
+ mov %eax,%cr0
+ jmp 1f
+1:
+
+#if defined(__x86_64__)
+
+ /* Now in compatibility mode. Long-jump into 64-bit mode. */
+ ljmp $TRAMP_CS64,$SYM_TRAMP_PHYS(start64)
+
+ .code64
+start64:
+ /* Jump to high mappings. */
+ mov high_start(%rip),%rax
+ jmpq *%rax
+
+high_start:
+ .quad __high_start
+
+#else /* !defined(__x86_64__) */
+
+ /* Install relocated selectors. */
+ lgdt gdt_descr
+ mov $(__HYPERVISOR_DS),%eax
+ mov %eax,%ds
+ mov %eax,%es
+ mov %eax,%fs
+ mov %eax,%gs
+ mov %eax,%ss
+ ljmp $(__HYPERVISOR_CS),$__high_start
+
+#endif
-#include <xen/config.h>
-#include <xen/multiboot.h>
-#include <public/xen.h>
-#include <asm/asm_defns.h>
-#include <asm/desc.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-
-#define SECONDARY_CPU_FLAG 0xA5A5A5A5
-
- .text
-
-ENTRY(start)
- jmp __start
-
- .align 4
-
-/*** MULTIBOOT HEADER ****/
-#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
- MULTIBOOT_HEADER_WANT_MEMORY)
- /* Magic number indicating a Multiboot header. */
- .long MULTIBOOT_HEADER_MAGIC
- /* Flags to bootloader (see Multiboot spec). */
- .long MULTIBOOT_HEADER_FLAGS
- /* Checksum: must be the negated sum of the first two fields. */
- .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
+ .code32
-not_multiboot_msg:
- .asciz "ERR: Not a Multiboot bootloader!"
-not_multiboot:
- mov $not_multiboot_msg-__PAGE_OFFSET,%esi
- mov $0xB8000,%edi # VGA framebuffer
-1: mov (%esi),%bl
- test %bl,%bl # Terminate on '\0' sentinel
-2: je 2b
- mov $0x3f8+5,%dx # UART Line Status Register
-3: in %dx,%al
- test $0x20,%al # Test THR Empty flag
- je 3b
- mov $0x3f8+0,%dx # UART Transmit Holding Register
- mov %bl,%al
- out %al,%dx # Send a character over the serial line
- movsb # Write a character to the VGA framebuffer
- mov $7,%al
- stosb # Write an attribute to the VGA framebuffer
- jmp 1b
+ /* Enable full CR4 features. */
+ mov mmu_cr4_features,%eax
+ mov %eax,%cr4
+
+ /* Initialise stack. */
+ mov stack_start,%esp
+ or $(STACK_SIZE-CPUINFO_sizeof),%esp
-__start:
- /* Set up a few descriptors: on entry only CS is guaranteed good. */
- lgdt %cs:nopaging_gdt_descr-__PAGE_OFFSET
- mov $(__HYPERVISOR_DS),%ecx
- mov %ecx,%ds
- mov %ecx,%es
- mov %ecx,%fs
- mov %ecx,%gs
- ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
-1: lss stack_start-__PAGE_OFFSET,%esp
- add $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
-
/* Reset EFLAGS (subsumes CLI and CLD). */
pushl $0
popf
- /* Set up FPU. */
- fninit
-
- /* Set up CR4, except global flag which Intel requires should be */
- /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
- mov mmu_cr4_features-__PAGE_OFFSET,%ecx
- and $0x7f,%cl # CR4.PGE (global enable)
- mov %ecx,%cr4
-
- cmp $(SECONDARY_CPU_FLAG),%ebx
- je start_paging
-
- /* Check for Multiboot bootloader */
- cmp $0x2BADB002,%eax
- jne not_multiboot
-
- /* Initialize BSS (no nasty surprises!) */
- mov $__bss_start-__PAGE_OFFSET,%edi
- mov $_end-__PAGE_OFFSET,%ecx
- sub %edi,%ecx
- xor %eax,%eax
- rep stosb
+ lidt idt_descr
- /* Save the Multiboot info structure for later use. */
- add $__PAGE_OFFSET,%ebx
- push %ebx
+ test %ebx,%ebx
+ jnz start_secondary
-#ifdef CONFIG_X86_PAE
- /* Initialize low and high mappings of all memory with 2MB pages */
- mov $idle_pg_table_l2-__PAGE_OFFSET,%edi
- mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */
-1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
- stosl /* low mapping */
- add $4,%edi
- add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $DIRECTMAP_PHYS_END+0xe3,%eax
- jne 1b
-1: stosl /* low mappings cover as much physmem as possible */
- add $4,%edi
- add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $HYPERVISOR_VIRT_START+0xe3,%eax
- jne 1b
-#else
- /* Initialize low and high mappings of all memory with 4MB pages */
- mov $idle_pg_table-__PAGE_OFFSET,%edi
- mov $0xe3,%eax /* PRESENT+RW+A+D+4MB */
-1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
- stosl /* low mapping */
- add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $DIRECTMAP_PHYS_END+0xe3,%eax
- jne 1b
-1: stosl /* low mappings cover as much physmem as possible */
- add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $HYPERVISOR_VIRT_START+0xe3,%eax
- jne 1b
-#endif
-
/* Initialise IDT with simple error defaults. */
lea ignore_int,%edx
mov $(__HYPERVISOR_CS << 16),%eax
mov %dx,%ax /* selector = 0x0010 = cs */
mov $0x8E00,%dx /* interrupt gate - dpl=0, present */
- lea idt_table-__PAGE_OFFSET,%edi
+ lea idt_table,%edi
mov $256,%ecx
1: mov %eax,(%edi)
mov %edx,4(%edi)
add $8,%edi
loop 1b
-
-start_paging:
-#ifdef CONFIG_X86_PAE
- /* Enable Execute-Disable (NX/XD) support if it is available. */
- push %ebx
- mov $0x80000000,%eax
- cpuid
- cmp $0x80000000,%eax /* Any function > 0x80000000? */
- jbe no_execute_disable
- mov $0x80000001,%eax
- cpuid
- bt $20,%edx /* Execute Disable? */
- jnc no_execute_disable
- movl $MSR_EFER,%ecx
- rdmsr
- bts $_EFER_NX,%eax
- wrmsr
-no_execute_disable:
- pop %ebx
-#endif
- mov $idle_pg_table-__PAGE_OFFSET,%eax
- mov %eax,%cr3
- mov $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
- mov %eax,%cr0
- jmp 1f
-1: /* Install relocated selectors (FS/GS unused). */
- lgdt gdt_descr
- mov $(__HYPERVISOR_DS),%ecx
- mov %ecx,%ds
- mov %ecx,%es
- mov %ecx,%ss
- ljmp $(__HYPERVISOR_CS),$1f
-1: /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
- mov mmu_cr4_features,%ecx
- mov %ecx,%cr4
- /* Relocate ESP */
- add $__PAGE_OFFSET,%esp
-
- lidt idt_descr
- cmp $(SECONDARY_CPU_FLAG),%ebx
- je start_secondary
-
- /* Call into main C routine. This should never return.*/
+ /* Pass off the Multiboot info structure to C land. */
+ mov multiboot_ptr,%eax
+ add $__PAGE_OFFSET,%eax
+ push %eax
call __start_xen
ud2 /* Force a panic (invalid opcode). */
call printk
1: jmp 1b
-/*** STACK LOCATION ***/
-
ENTRY(stack_start)
.long cpu0_stack
- .long __HYPERVISOR_DS
/*** DESCRIPTOR TABLES ***/
ALIGN
+multiboot_ptr:
+ .long 0
.word 0
idt_descr:
.word 0
nopaging_gdt_descr:
.word LAST_RESERVED_GDT_BYTE
- .long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
-
+ .long SYM_PHYS(gdt_table) - FIRST_RESERVED_GDT_BYTE
+
.align PAGE_SIZE, 0
/* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
/* the machine->physical mapping table. Ring 0 can access all memory. */
.quad 0x0000000000000000 /* unused */
.fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
- .align PAGE_SIZE, 0
-
#ifdef CONFIG_X86_PAE
+ .align 32
ENTRY(idle_pg_table)
-ENTRY(idle_pg_table_l3)
- .long idle_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
- .long idle_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
- .long idle_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
- .long idle_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
-.section ".bss.page_aligned","w"
-ENTRY(idle_pg_table_l2)
- .fill 4*PAGE_SIZE,1,0
-#else
-.section ".bss.page_aligned","w"
-ENTRY(idle_pg_table)
-ENTRY(idle_pg_table_l2)
- .fill 1*PAGE_SIZE,1,0
+ .long SYM_PHYS(idle_pg_table_l2) + 0*PAGE_SIZE + 0x01, 0
+ .long SYM_PHYS(idle_pg_table_l2) + 1*PAGE_SIZE + 0x01, 0
+ .long SYM_PHYS(idle_pg_table_l2) + 2*PAGE_SIZE + 0x01, 0
+ .long SYM_PHYS(idle_pg_table_l2) + 3*PAGE_SIZE + 0x01, 0
#endif
-
-.section ".bss.stack_aligned","w"
-ENTRY(cpu0_stack)
- .fill STACK_SIZE,1,0
-#include <xen/config.h>
-#include <xen/multiboot.h>
-#include <public/xen.h>
-#include <asm/asm_defns.h>
-#include <asm/desc.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-
-#define SECONDARY_CPU_FLAG 0xA5A5A5A5
-
- .text
- .code32
-
-#define SYM_PHYS(sym) (sym - __PAGE_OFFSET)
-
-ENTRY(start)
- jmp __start
-
- .org 0x004
-/*** MULTIBOOT HEADER ****/
-#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
- MULTIBOOT_HEADER_WANT_MEMORY)
- /* Magic number indicating a Multiboot header. */
- .long MULTIBOOT_HEADER_MAGIC
- /* Flags to bootloader (see Multiboot spec). */
- .long MULTIBOOT_HEADER_FLAGS
- /* Checksum: must be the negated sum of the first two fields. */
- .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
-
-.Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
-.Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
-
-bad_cpu:
- mov $(SYM_PHYS(.Lbad_cpu_msg)),%esi # Error message
- jmp print_err
-not_multiboot:
- mov $(SYM_PHYS(.Lbad_ldr_msg)),%esi # Error message
-print_err:
- mov $0xB8000,%edi # VGA framebuffer
-1: mov (%esi),%bl
- test %bl,%bl # Terminate on '\0' sentinel
-2: je 2b
- mov $0x3f8+5,%dx # UART Line Status Register
-3: in %dx,%al
- test $0x20,%al # Test THR Empty flag
- je 3b
- mov $0x3f8+0,%dx # UART Transmit Holding Register
- mov %bl,%al
- out %al,%dx # Send a character over the serial line
- movsb # Write a character to the VGA framebuffer
- mov $7,%al
- stosb # Write an attribute to the VGA framebuffer
- jmp 1b
-
-__start:
- cld
- cli
+ .code64
- /* Set up a few descriptors: on entry only CS is guaranteed good. */
- lgdt %cs:SYM_PHYS(nopaging_gdt_descr)
- mov $(__HYPERVISOR_DS32),%ecx
+ /* Install relocated data selectors. */
+ lgdt gdt_descr(%rip)
+ mov $(__HYPERVISOR_DS64),%ecx
mov %ecx,%ds
mov %ecx,%es
-
- cmp $(SECONDARY_CPU_FLAG),%ebx
- je skip_boot_checks
-
- /* Check for Multiboot bootloader */
- cmp $0x2BADB002,%eax
- jne not_multiboot
-
- /* Save the Multiboot info structure for later use. */
- mov %ebx,SYM_PHYS(multiboot_ptr)
-
- /* We begin by interrogating the CPU for the presence of long mode. */
- mov $0x80000000,%eax
- cpuid
- cmp $0x80000000,%eax # any function > 0x80000000?
- jbe bad_cpu
- mov $0x80000001,%eax
- cpuid
- bt $29,%edx # Long mode feature?
- jnc bad_cpu
- mov %edx,SYM_PHYS(cpuid_ext_features)
-skip_boot_checks:
-
- /* Set up FPU. */
- fninit
-
- /* Enable PAE in CR4. */
- mov $0x20,%ecx # X86_CR4_PAE
- mov %ecx,%cr4
-
- /* Load pagetable base register. */
- mov $SYM_PHYS(idle_pg_table),%eax
- mov %eax,%cr3
-
- /* Set up EFER (Extended Feature Enable Register). */
- movl $MSR_EFER,%ecx
- rdmsr
- btsl $_EFER_LME,%eax /* Long Mode */
- btsl $_EFER_SCE,%eax /* SYSCALL/SYSRET */
- mov SYM_PHYS(cpuid_ext_features),%edi
- btl $20,%edi /* CPUID 0x80000001, EDX[20] */
- jnc 1f
- btsl $_EFER_NX,%eax /* No-Execute */
-1: wrmsr
-
- mov $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
- mov %eax,%cr0
- jmp 1f
-
-1: /* Now in compatibility mode. Long-jump into 64-bit mode. */
- ljmp $(__HYPERVISOR_CS64),$SYM_PHYS(start64)
-
- .code64
-start64:
- /* Install relocated selectors (FS/GS unused). */
- lgdt gdt_descr(%rip)
+ mov %ecx,%fs
+ mov %ecx,%gs
+ mov %ecx,%ss
/* Enable full CR4 features. */
mov mmu_cr4_features(%rip),%rcx
pushq $0
popf
- /* Jump to high mappings. */
- mov high_start(%rip),%rax
- push %rax
- ret
-__high_start:
-
- mov $(__HYPERVISOR_DS64),%ecx
- mov %ecx,%ds
- mov %ecx,%es
- mov %ecx,%fs
- mov %ecx,%gs
- mov %ecx,%ss
-
- lidt idt_descr(%rip)
-
- cmp $(SECONDARY_CPU_FLAG),%ebx
- je start_secondary
+ /* Reload code selector. */
+ pushq $(__HYPERVISOR_CS64)
+ leaq 1f(%rip),%rax
+ pushq %rax
+ lretq
+1: lidt idt_descr(%rip)
- /* Initialize BSS (no nasty surprises!) */
- lea __bss_start(%rip),%rdi
- lea _end(%rip),%rcx
- sub %rdi,%rcx
- xor %rax,%rax
- rep stosb
+ test %ebx,%ebx
+ jnz start_secondary
/* Initialise IDT with simple error defaults. */
leaq ignore_int(%rip),%rcx
multiboot_ptr:
.long 0
- .word 0
-nopaging_gdt_descr:
- .word LAST_RESERVED_GDT_BYTE
- .quad gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
-
-cpuid_ext_features:
- .long 0
-
.word 0
gdt_descr:
.word LAST_RESERVED_GDT_BYTE
ENTRY(stack_start)
.quad cpu0_stack
-high_start:
- .quad __high_start
-
.align PAGE_SIZE, 0
ENTRY(gdt_table)
.quad 0x0000000000000000 /* unused */
.org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
.fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
-#ifdef CONFIG_COMPAT
.align PAGE_SIZE, 0
/* NB. Even rings != 0 get access to the full 4Gb, as only the */
/* (compatibility) machine->physical mapping table lives there. */
.quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
.org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
.fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
-# undef LIMIT
-#endif
-
-/* Initial PML4 -- level-4 page table. */
- .align PAGE_SIZE, 0
-ENTRY(idle_pg_table)
-ENTRY(idle_pg_table_4)
- .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[0]
- .fill 261,8,0
- .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[262]
-
-/* Initial PDP -- level-3 page table. */
- .align PAGE_SIZE, 0
-ENTRY(idle_pg_table_l3)
- .quad idle_pg_table_l2 - __PAGE_OFFSET + 7
-
-/* Initial PDE -- level-2 page table. Maps first 1GB physical memory. */
- .align PAGE_SIZE, 0
-ENTRY(idle_pg_table_l2)
- .macro identmap from=0, count=512
- .if \count-1
- identmap "(\from+0)","(\count/2)"
- identmap "(\from+(0x200000*(\count/2)))","(\count/2)"
- .else
- .quad 0x00000000000001e3 + \from
- .endif
- .endm
- identmap
-
- .align PAGE_SIZE, 0
-
-.section ".bss.stack_aligned","w"
-ENTRY(cpu0_stack)
- .fill STACK_SIZE,1,0
}
} /* add_memory_region */
-static void __init print_e820_memory_map(struct e820entry *map, int entries)
+/*static*/ void __init print_e820_memory_map(struct e820entry *map, int entries)
{
int i;
prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
if ( mfn_valid(prev_mfn) )
{
- if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) )
+ if ( is_xen_heap_frame(mfn_to_page(prev_mfn)) )
/* Xen heap frames are simply unhooked from this phys slot. */
guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
else
void memguard_init(void)
{
map_pages_to_xen(
- PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
+ (unsigned long)__va(xen_phys_start),
+ xen_phys_start >> PAGE_SHIFT,
+ (xenheap_phys_end - xen_phys_start) >> PAGE_SHIFT,
__PAGE_HYPERVISOR|MAP_SMALL_PAGES);
+#ifdef __x86_64__
+ map_pages_to_xen(
+ XEN_VIRT_START,
+ xen_phys_start >> PAGE_SHIFT,
+ (__pa(&_end) + PAGE_SIZE - 1 - xen_phys_start) >> PAGE_SHIFT,
+ __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
+#endif
}
static void __memguard_change_range(void *p, unsigned long l, int guard)
#include <acm/acm_hooks.h>
#include <xen/kexec.h>
+#if defined(CONFIG_X86_64)
+#define BOOTSTRAP_DIRECTMAP_END (1UL << 32)
+#define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
+#else
+#define BOOTSTRAP_DIRECTMAP_END HYPERVISOR_VIRT_START
+#define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
+#endif
+
extern void dmi_scan_machine(void);
extern void generic_apic_probe(void);
extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
cpumask_t cpu_present_map;
+unsigned long xen_phys_start;
+
/* Limits of Xen heap, used to initialise the allocator. */
unsigned long xenheap_phys_start, xenheap_phys_end;
struct tss_struct init_tss[NR_CPUS];
-extern unsigned long cpu0_stack[];
+char __attribute__ ((__section__(".bss.page_aligned"))) cpu0_stack[STACK_SIZE];
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
int acpi_force;
char acpi_param[10] = "";
-static void parse_acpi_param(char *s)
+static void __init parse_acpi_param(char *s)
{
/* Save the parameter so it can be propagated to domain0. */
safe_strcpy(acpi_param, s);
(*call)();
}
-#define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
+#define EARLY_FAIL(f, a...) do { \
+ printk( f , ## a ); \
+ for ( ; ; ) __asm__ __volatile__ ( "hlt" ); \
+} while (0)
-static struct e820entry e820_raw[E820MAX];
+static struct e820entry __initdata e820_raw[E820MAX];
-static unsigned long initial_images_start, initial_images_end;
+static unsigned long __initdata initial_images_start, initial_images_end;
-unsigned long initial_images_nrpages(void)
+unsigned long __init initial_images_nrpages(void)
{
unsigned long s = initial_images_start + PAGE_SIZE - 1;
unsigned long e = initial_images_end;
return ((e >> PAGE_SHIFT) - (s >> PAGE_SHIFT));
}
-void discard_initial_images(void)
+void __init discard_initial_images(void)
{
init_domheap_pages(initial_images_start, initial_images_end);
}
static void __init percpu_init_areas(void)
{
unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
+ unsigned int first_unused;
BUG_ON(data_size > PERCPU_SIZE);
- for_each_cpu ( i )
- {
- memguard_unguard_range(__per_cpu_start + (i << PERCPU_SHIFT),
- 1 << PERCPU_SHIFT);
- if ( i != 0 )
- memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
- __per_cpu_start,
- data_size);
- }
-}
-
-static void __init percpu_guard_areas(void)
-{
- memguard_guard_range(__per_cpu_start, __per_cpu_end - __per_cpu_start);
-}
-
-static void __init percpu_free_unused_areas(void)
-{
- unsigned int i, first_unused;
-
- /* Find first unused CPU number. */
- for ( i = 0; i < NR_CPUS; i++ )
- if ( !cpu_possible(i) )
- break;
+ /* Initialise per-cpu data area for all possible secondary CPUs. */
+ for ( i = 1; (i < NR_CPUS) && cpu_possible(i); i++ )
+ memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
+ __per_cpu_start,
+ data_size);
first_unused = i;
/* Check that there are no holes in cpu_possible_map. */
}
/* Fetch acm policy module from multiboot modules. */
-static void extract_acm_policy(
+static void __init extract_acm_policy(
multiboot_info_t *mbi,
unsigned int *initrdidx,
char **_policy_start,
for ( i = mbi->mods_count-1; i >= 1; i-- )
{
start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
-#if defined(__i386__)
- policy_start = (char *)start;
-#elif defined(__x86_64__)
- policy_start = __va(start);
-#endif
+ policy_start = maddr_to_bootstrap_virt(start);
policy_len = mod[i].mod_end - mod[i].mod_start;
if ( acm_is_policy(policy_start, policy_len) )
{
setup_idle_pagetable();
}
-static void srat_detect_node(int cpu)
+static void __init srat_detect_node(int cpu)
{
unsigned node;
u8 apicid = x86_cpu_to_apicid[cpu];
printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
}
-void __init move_memory(unsigned long dst,
- unsigned long src_start, unsigned long src_end)
+static void __init move_memory(
+ unsigned long dst, unsigned long src_start, unsigned long src_end)
{
-#if defined(CONFIG_X86_32)
- memmove((void *)dst, /* use low mapping */
- (void *)src_start, /* use low mapping */
+ memmove(maddr_to_bootstrap_virt(dst),
+ maddr_to_bootstrap_virt(src_start),
src_end - src_start);
-#elif defined(CONFIG_X86_64)
- memmove(__va(dst),
- __va(src_start),
- src_end - src_start);
-#endif
+}
+
+/* A temporary copy of the e820 map that we can mess with during bootstrap. */
+static struct e820map __initdata boot_e820;
+
+/* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
+static void __init reserve_in_boot_e820(unsigned long s, unsigned long e)
+{
+ unsigned long rs, re;
+ int i;
+
+ for ( i = 0; i < boot_e820.nr_map; i++ )
+ {
+ /* Have we found the e820 region that includes the specified range? */
+ rs = boot_e820.map[i].addr;
+ re = boot_e820.map[i].addr + boot_e820.map[i].size;
+ if ( (s < rs) || (e > re) )
+ continue;
+
+ /* Start fragment. */
+ boot_e820.map[i].size = s - rs;
+
+ /* End fragment. */
+ if ( e < re )
+ {
+ memmove(&boot_e820.map[i+1], &boot_e820.map[i],
+ (boot_e820.nr_map-i) * sizeof(boot_e820.map[0]));
+ boot_e820.nr_map++;
+ i++;
+ boot_e820.map[i].addr = e;
+ boot_e820.map[i].size = re - e;
+ }
+ }
}
void __init __start_xen(multiboot_info_t *mbi)
unsigned long _policy_len = 0;
module_t *mod = (module_t *)__va(mbi->mods_addr);
unsigned long nr_pages, modules_length;
- paddr_t s, e;
int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
struct ns16550_defaults ns16550 = {
.data_bits = 8,
/* Check that we have at least one Multiboot module. */
if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
- {
- printk("FATAL ERROR: dom0 kernel not specified."
- " Check bootloader configuration.\n");
- EARLY_FAIL();
- }
+ EARLY_FAIL("dom0 kernel not specified. "
+ "Check bootloader configuration.\n");
if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
- {
- printk("FATAL ERROR: Misaligned CPU0 stack.\n");
- EARLY_FAIL();
- }
+ EARLY_FAIL("Misaligned CPU0 stack.\n");
/*
* Since there are some stubs getting built on the stacks which use
*/
if ( opt_xenheap_megabytes > 2048 )
opt_xenheap_megabytes = 2048;
- xenheap_phys_end = opt_xenheap_megabytes << 20;
if ( mbi->flags & MBI_MEMMAP )
{
}
else
{
- printk("FATAL ERROR: Bootloader provided no memory information.\n");
- for ( ; ; ) ;
+ EARLY_FAIL("Bootloader provided no memory information.\n");
}
if ( e820_warn )
/* Sanitise the raw E820 map to produce a final clean version. */
max_page = init_e820(e820_raw, &e820_raw_nr);
- modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
-
- /* Find a large enough RAM extent to stash the DOM0 modules. */
- for ( i = 0; ; i++ )
+ /*
+ * Create a temporary copy of the E820 map. Truncate it to above 16MB
+ * as anything below that is already mapped and has a statically-allocated
+ * purpose.
+ */
+ memcpy(&boot_e820, &e820, sizeof(e820));
+ for ( i = 0; i < boot_e820.nr_map; i++ )
{
- if ( i == e820.nr_map )
+ uint64_t s, e, min = 16 << 20; /* 16MB */
+ s = boot_e820.map[i].addr;
+ e = boot_e820.map[i].addr + boot_e820.map[i].size;
+ if ( s >= min )
+ continue;
+ if ( e > min )
{
- printk("Not enough memory to stash the DOM0 kernel image.\n");
- for ( ; ; ) ;
+ boot_e820.map[i].addr = min;
+ boot_e820.map[i].size = e - min;
}
-
- if ( (e820.map[i].type == E820_RAM) &&
- (e820.map[i].size >= modules_length) &&
- ((e820.map[i].addr + e820.map[i].size) >=
- (xenheap_phys_end + modules_length)) )
- break;
+ else
+ boot_e820.map[i].type = E820_RESERVED;
}
- /* Stash as near as possible to the beginning of the RAM extent. */
- initial_images_start = e820.map[i].addr;
- if ( initial_images_start < xenheap_phys_end )
- initial_images_start = xenheap_phys_end;
- initial_images_end = initial_images_start + modules_length;
-
- move_memory(initial_images_start,
- mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
-
- /* Initialise boot-time allocator with all RAM situated after modules. */
- xenheap_phys_start = init_boot_allocator(__pa(&_end));
- nr_pages = 0;
- for ( i = 0; i < e820.nr_map; i++ )
+ /*
+ * Iterate over all superpage-aligned RAM regions.
+ *
+ * We require superpage alignment because the boot allocator is not yet
+ * initialised. Hence we can only map superpages in the address range
+ * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
+ * dynamic allocation of pagetables.
+ *
+ * As well as mapping superpages in that range, in preparation for
+ * initialising the boot allocator, we also look for a region to which
+ * we can relocate the dom0 kernel and other multiboot modules. Also, on
+ * x86/64, we relocate Xen to higher memory.
+ */
+ modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
+ for ( i = 0; i < boot_e820.nr_map; i++ )
{
- if ( e820.map[i].type != E820_RAM )
+ uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+
+ /* Superpage-aligned chunks up to BOOTSTRAP_DIRECTMAP_END, please. */
+ s = (boot_e820.map[i].addr + mask) & ~mask;
+ e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+ e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
+ if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
continue;
- nr_pages += e820.map[i].size >> PAGE_SHIFT;
+ /* Map the chunk. No memory will need to be allocated to do this. */
+ map_pages_to_xen(
+ (unsigned long)maddr_to_bootstrap_virt(s),
+ s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
- /* Initialise boot heap, skipping Xen heap and dom0 modules. */
- s = e820.map[i].addr;
- e = s + e820.map[i].size;
- if ( s < xenheap_phys_end )
- s = xenheap_phys_end;
- if ( (s < initial_images_end) && (e > initial_images_start) )
- s = initial_images_end;
- init_boot_pages(s, e);
+ /* Is the region suitable for relocating the multiboot modules? */
+ if ( !initial_images_start && ((e-s) >= modules_length) )
+ {
+ e -= modules_length;
+ e &= ~mask;
+ initial_images_start = e;
+ initial_images_end = initial_images_start + modules_length;
+ move_memory(initial_images_start,
+ mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
+ if ( s >= e )
+ continue;
+ }
#if defined(CONFIG_X86_64)
- /*
- * x86/64 maps all registered RAM. Points to note:
- * 1. The initial pagetable already maps low 1GB, so skip that.
- * 2. We must map *only* RAM areas, taking care to avoid I/O holes.
- * Failure to do this can cause coherency problems and deadlocks
- * due to cache-attribute mismatches (e.g., AMD/AGP Linux bug).
- */
+ /* Is the region suitable for relocating Xen? */
+ if ( !xen_phys_start && (((e-s) >> 20) >= opt_xenheap_megabytes) )
{
- /* Calculate page-frame range, discarding partial frames. */
- unsigned long start, end;
- unsigned long init_mapped = 1UL << (30 - PAGE_SHIFT); /* 1GB */
- start = PFN_UP(e820.map[i].addr);
- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
- /* Clip the range to exclude what the bootstrapper initialised. */
- if ( start < init_mapped )
- start = init_mapped;
- if ( end <= start )
- continue;
- /* Request the mapping. */
- map_pages_to_xen(
- PAGE_OFFSET + (start << PAGE_SHIFT),
- start, end-start, PAGE_HYPERVISOR);
+ extern l2_pgentry_t l2_xenmap[];
+ l4_pgentry_t *pl4e;
+ l3_pgentry_t *pl3e;
+ l2_pgentry_t *pl2e;
+ int i, j;
+
+ /* Select relocation address. */
+ e = (e - (opt_xenheap_megabytes << 20)) & ~mask;
+ xen_phys_start = e;
+ boot_trampoline_va(trampoline_xen_phys_start) = e;
+
+ /*
+ * Perform relocation to new physical address.
+ * Before doing so we must sync static/global data with main memory
+ * with a barrier(). After this we must *not* modify static/global
+ * data until after we have switched to the relocated pagetables!
+ */
+ barrier();
+ move_memory(e, 0, __pa(&_end) - xen_phys_start);
+
+ /* Walk initial pagetables, relocating page directory entries. */
+ pl4e = __va(__pa(idle_pg_table));
+ for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
+ {
+ if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
+ continue;
+ *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
+ xen_phys_start);
+ pl3e = l4e_to_l3e(*pl4e);
+ for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
+ {
+ /* Not present or already relocated? */
+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
+ (l3e_get_pfn(*pl3e) > 0x1000) )
+ continue;
+ *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
+ xen_phys_start);
+ }
+ }
+
+ /* The only data mappings to be relocated are in the Xen area. */
+ pl2e = __va(__pa(l2_xenmap));
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
+ {
+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+ continue;
+ *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
+ xen_phys_start);
+ }
+
+ /* Re-sync the stack and then switch to relocated pagetables. */
+ asm volatile (
+ "rep movsb ; " /* re-sync the stack */
+ "movq %%cr4,%%rsi ; "
+ "andb $0x7f,%%sil ; "
+ "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
+ "movq %0,%%cr3 ; " /* CR3 == new pagetables */
+ "orb $0x80,%%sil ; "
+ "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */
+ : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
+ "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
}
#endif
}
- if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0)
+ if ( !initial_images_start )
+ EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
+ reserve_in_boot_e820(initial_images_start, initial_images_end);
+
+ /*
+ * With modules (and Xen itself, on x86/64) relocated out of the way, we
+ * can now initialise the boot allocator with some memory.
+ */
+ xenheap_phys_start = init_boot_allocator(__pa(&_end));
+ xenheap_phys_end = opt_xenheap_megabytes << 20;
+#if defined(CONFIG_X86_64)
+ if ( !xen_phys_start )
+ EARLY_FAIL("Not enough memory to relocate Xen.\n");
+ xenheap_phys_end += xen_phys_start;
+ reserve_in_boot_e820(xen_phys_start,
+ xen_phys_start + (opt_xenheap_megabytes<<20));
+ init_boot_pages(1<<20, 16<<20); /* Initial seed: 15MB */
+#else
+ init_boot_pages(xenheap_phys_end, 16<<20); /* Initial seed: 4MB */
+#endif
+
+ /*
+ * With the boot allocator now seeded, we can walk every RAM region and
+ * map it in its entirety (on x86/64, at least) and notify it to the
+ * boot allocator.
+ */
+ for ( i = 0; i < boot_e820.nr_map; i++ )
+ {
+ uint64_t s, e, map_e, mask = PAGE_SIZE - 1;
+
+ /* Only page alignment required now. */
+ s = (boot_e820.map[i].addr + mask) & ~mask;
+ e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+ if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
+ continue;
+
+ /* Perform the mapping (truncated in 32-bit mode). */
+ map_e = e;
+#if defined(CONFIG_X86_32)
+ map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
+#endif
+ if ( s < map_e )
+ map_pages_to_xen(
+ (unsigned long)maddr_to_bootstrap_virt(s),
+ s >> PAGE_SHIFT, (map_e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+
+ init_boot_pages(s, e);
+ }
+
+ if ( (kexec_crash_area.size > 0) && (kexec_crash_area.start > 0) )
{
unsigned long kdump_start, kdump_size, k;
#if defined(CONFIG_X86_32)
/* Must allocate within bootstrap 1:1 limits. */
- k = alloc_boot_low_pages(k, 1); /* 0x0 - HYPERVISOR_VIRT_START */
+ k = alloc_boot_low_pages(k, 1); /* 0x0 - BOOTSTRAP_DIRECTMAP_END */
#else
k = alloc_boot_pages(k, 1);
#endif
}
memguard_init();
- percpu_guard_areas();
+ nr_pages = 0;
+ for ( i = 0; i < e820.nr_map; i++ )
+ if ( e820.map[i].type == E820_RAM )
+ nr_pages += e820.map[i].size >> PAGE_SHIFT;
printk("System RAM: %luMB (%lukB)\n",
nr_pages >> (20 - PAGE_SHIFT),
nr_pages << (PAGE_SHIFT - 10));
numa_initmem_init(0, max_page);
/* Initialise the Xen heap, skipping RAM holes. */
- nr_pages = 0;
- for ( i = 0; i < e820.nr_map; i++ )
- {
- if ( e820.map[i].type != E820_RAM )
- continue;
-
- s = e820.map[i].addr;
- e = s + e820.map[i].size;
- if ( s < xenheap_phys_start )
- s = xenheap_phys_start;
- if ( e > xenheap_phys_end )
- e = xenheap_phys_end;
-
- if ( s < e )
- {
- nr_pages += (e - s) >> PAGE_SHIFT;
- init_xenheap_pages(s, e);
- }
- }
-
+ init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
+ nr_pages = (xenheap_phys_end - xenheap_phys_start) >> PAGE_SHIFT;
+#ifdef __x86_64__
+ init_xenheap_pages(xen_phys_start, __pa(&_start));
+ nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT;
+#endif
+ xenheap_phys_start = xen_phys_start;
printk("Xen heap: %luMB (%lukB)\n",
nr_pages >> (20 - PAGE_SHIFT),
nr_pages << (PAGE_SHIFT - 10));
find_smp_config();
- smp_alloc_memory();
-
dmi_scan_machine();
generic_apic_probe();
printk("Brought up %ld CPUs\n", (long)num_online_cpus());
smp_cpus_done(max_cpus);
- percpu_free_unused_areas();
-
initialise_gdb(); /* could be moved earlier */
do_initcalls();
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
-static inline int set_kernel_exec(unsigned long x, int y) { return 0; }
-#define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
+#define set_kernel_exec(x, y) (0)
+#define setup_trampoline() (boot_trampoline_pa(trampoline_realmode_entry))
/* Set if we find a B stepping CPU */
static int __devinitdata smp_b_stepping;
{ [0 ... NR_CPUS-1] = 0xff };
EXPORT_SYMBOL(x86_cpu_to_apicid);
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end [];
-static unsigned char *trampoline_base;
-static int trampoline_exec;
-
static void map_cpu_to_logical_apicid(void);
-/* State of each CPU. */
-/*DEFINE_PER_CPU(int, cpu_state) = { 0 };*/
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __devinit setup_trampoline(void)
-{
- memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
- return virt_to_maddr(trampoline_base);
-}
-
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
- trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- if (__pa(trampoline_base) >= 0x9F000)
- BUG();
- /*
- * Make the SMP trampoline executable:
- */
- trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
-}
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
print_cpu_info(&cpu_data[cpu]);
Dprintk("CPU has booted.\n");
} else {
- boot_error= 1;
- if (*((volatile unsigned char *)trampoline_base)
- == 0xA5)
+ boot_error = 1;
+ mb();
+ if (boot_trampoline_va(trampoline_cpu_started) == 0xA5)
/* trampoline started but...? */
printk("Stuck ??\n");
else
}
/* mark "stuck" area as not stuck */
- *((volatile unsigned long *)trampoline_base) = 0;
+ boot_trampoline_va(trampoline_cpu_started) = 0;
+ mb();
return boot_error;
}
+++ /dev/null
-/*
- *
- * Trampoline.S Derived from Setup.S by Linus Torvalds
- *
- * 4 Jan 1997 Michael Chastain: changed to gnu as.
- *
- * Entry: CS:IP point to the start of our code, we are
- * in real mode with no stack, but the rest of the
- * trampoline page to make our stack and everything else
- * is a mystery.
- *
- * On entry to trampoline_data, the processor is in real mode
- * with 16-bit addressing and 16-bit data. CS has some value
- * and IP is zero. Thus, data addresses need to be absolute
- * (no relocation) and are taken with regard to r_base.
- */
-
-#include <xen/config.h>
-#include <public/xen.h>
-#include <asm/desc.h>
-#include <asm/page.h>
-
-#ifdef CONFIG_SMP
-
-.data
-
-.code16
-
-ENTRY(trampoline_data)
-r_base = .
- mov %cs, %ax # Code and data in the same place
- mov %ax, %ds
-
- movl $0xA5A5A5A5, %ebx # Flag an SMP trampoline
- cli # We should be safe anyway
-
- movl $0xA5A5A5A5, trampoline_data - r_base
-
- lidt idt_48 - r_base # load idt with 0, 0
- lgdt gdt_48 - r_base # load gdt with whatever is appropriate
-
- xor %ax, %ax
- inc %ax # protected mode (PE) bit
- lmsw %ax # into protected mode
- jmp flush_instr
-flush_instr:
-#if defined(__x86_64__)
- ljmpl $__HYPERVISOR_CS32, $0x100000 # 1MB
-#else
- ljmpl $__HYPERVISOR_CS, $0x100000 # 1MB
-#endif
-
-idt_48:
- .word 0 # idt limit = 0
- .word 0, 0 # idt base = 0L
-
-gdt_48:
- .word LAST_RESERVED_GDT_BYTE
-#ifdef __i386__
- .long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
-#else
- .long 0x101000 - FIRST_RESERVED_GDT_BYTE
-#endif
-
-ENTRY(trampoline_end)
-
-#endif /* CONFIG_SMP */
* GPR context. This is needed for some systems which (ab)use IN/OUT
* to communicate with BIOS code in system-management mode.
*/
+#ifdef __x86_64__
+ /* movq $host_to_guest_gpr_switch,%rcx */
+ io_emul_stub[0] = 0x48;
+ io_emul_stub[1] = 0xb9;
+ *(void **)&io_emul_stub[2] = (void *)host_to_guest_gpr_switch;
+ /* callq *%rcx */
+ io_emul_stub[10] = 0xff;
+ io_emul_stub[11] = 0xd1;
+#else
/* call host_to_guest_gpr_switch */
io_emul_stub[0] = 0xe8;
*(s32 *)&io_emul_stub[1] =
(char *)host_to_guest_gpr_switch - &io_emul_stub[5];
+ /* 7 x nop */
+ memset(&io_emul_stub[5], 0x90, 7);
+#endif
/* data16 or nop */
- io_emul_stub[5] = (op_bytes != 2) ? 0x90 : 0x66;
+ io_emul_stub[12] = (op_bytes != 2) ? 0x90 : 0x66;
/* <io-access opcode> */
- io_emul_stub[6] = opcode;
+ io_emul_stub[13] = opcode;
/* imm8 or nop */
- io_emul_stub[7] = 0x90;
- /* jmp guest_to_host_gpr_switch */
- io_emul_stub[8] = 0xe9;
- *(s32 *)&io_emul_stub[9] =
- (char *)guest_to_host_gpr_switch - &io_emul_stub[13];
+ io_emul_stub[14] = 0x90;
+ /* ret (jumps to guest_to_host_gpr_switch) */
+ io_emul_stub[15] = 0xc3;
/* Handy function-typed pointer to the stub. */
io_emul = (void *)io_emul_stub;
op_bytes = 1;
case 0xe5: /* IN imm8,%eax */
port = insn_fetch(u8, code_base, eip, code_limit);
- io_emul_stub[7] = port; /* imm8 */
+ io_emul_stub[14] = port; /* imm8 */
exec_in:
if ( !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
op_bytes = 1;
case 0xe7: /* OUT %eax,imm8 */
port = insn_fetch(u8, code_base, eip, code_limit);
- io_emul_stub[7] = port; /* imm8 */
+ io_emul_stub[14] = port; /* imm8 */
exec_out:
if ( !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
movl UREGS_esi(%eax), %esi
pushl %edi
movl UREGS_edi(%eax), %edi
+ pushl $guest_to_host_gpr_switch
pushl %ecx
movl UREGS_ecx(%eax), %ecx
movl UREGS_eax(%eax), %eax
#include <asm/fixmap.h>
#include <public/memory.h>
+#ifdef CONFIG_X86_PAE
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ idle_pg_table_l2[4 * L2_PAGETABLE_ENTRIES];
+#else
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ idle_pg_table_l2[L2_PAGETABLE_ENTRIES];
+#endif
+
unsigned int PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
#include <asm/msr.h>
#include <asm/page.h>
-.text
+#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START)
+
+ .text
.code64
test %r9,%r9
jnz 1b
- movq %rbx,%rdx
- mov $__PAGE_OFFSET,%rbx
- sub %rbx, %rdx
+ mov $SYM_PHYS(compat_page_list),%rdx
/*
* Setup an identity mapped region in PML4[0] of idle page
* table.
*/
- lea idle_pg_table_l3(%rip),%rax
+ lea l3_identmap(%rip),%rax
sub %rbx,%rax
or $0x63,%rax
mov %rax, idle_pg_table(%rip)
/* Switch to idle page table. */
- movq $(idle_pg_table - __PAGE_OFFSET), %rax
+ movq $SYM_PHYS(idle_pg_table), %rax
movq %rax, %cr3
/* Jump to low identity mapping in compatibility mode. */
ud2
compatibility_mode_far:
- .long compatibility_mode - __PAGE_OFFSET
+ .long SYM_PHYS(compatibility_mode)
.long __HYPERVISOR_CS32
.code32
movl %eax, %cr0
/* Switch to 32 bit page table. */
- movl $compat_pg_table - __PAGE_OFFSET, %eax
+ movl $SYM_PHYS(compat_pg_table), %eax
movl %eax, %cr3
/* Clear MSR_EFER[LME], disabling long mode */
* first 4G of the physical address space.
*/
compat_pg_table:
- .long compat_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
- .long compat_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
- .long compat_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
- .long compat_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
+ .long SYM_PHYS(compat_pg_table_l2) + 0*PAGE_SIZE + 0x01, 0
+ .long SYM_PHYS(compat_pg_table_l2) + 1*PAGE_SIZE + 0x01, 0
+ .long SYM_PHYS(compat_pg_table_l2) + 2*PAGE_SIZE + 0x01, 0
+ .long SYM_PHYS(compat_pg_table_l2) + 3*PAGE_SIZE + 0x01, 0
.align 4096,0
pushq %r15
movq UREGS_r11(%rdi), %r11
movq UREGS_r15(%rdi), %r15
+ pushq %rcx /* dummy push, filled by guest_to_host_gpr_switch pointer */
pushq %rcx
+ leaq guest_to_host_gpr_switch(%rip),%rcx
+ movq %rcx,8(%rsp)
movq UREGS_rcx(%rdi), %rcx
movq UREGS_rdi(%rdi), %rdi
ret
unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
#endif
+/* Top-level master (and idle-domain) page directory. */
+l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ idle_pg_table[L4_PAGETABLE_ENTRIES];
+
+/* Enough page directories to map bottom 4GB of the memory map. */
+l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l3_identmap[L3_PAGETABLE_ENTRIES];
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l2_identmap[4*L2_PAGETABLE_ENTRIES];
+
+/* Enough page directories to map the Xen text and static data. */
+l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l3_xenmap[L3_PAGETABLE_ENTRIES];
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l2_xenmap[L2_PAGETABLE_ENTRIES];
+
void *alloc_xen_pagetable(void)
{
extern int early_boot;
#include <asm/shared.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
-
#include <public/callback.h>
+asmlinkage void syscall_enter(void);
+asmlinkage void compat_hypercall(void);
+asmlinkage void int80_direct_trap(void);
+
static void print_xen_info(void)
{
char taint_str[TAINT_STRING_MAX_LEN];
return 0;
}
-asmlinkage void syscall_enter(void);
-asmlinkage void compat_hypercall(void);
-asmlinkage void int80_direct_trap(void);
+static int write_stack_trampoline(
+ char *stack, char *stack_bottom, uint16_t cs_seg)
+{
+ /* movq %rsp, saversp(%rip) */
+ stack[0] = 0x48;
+ stack[1] = 0x89;
+ stack[2] = 0x25;
+ *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
+
+ /* leaq saversp(%rip), %rsp */
+ stack[7] = 0x48;
+ stack[8] = 0x8d;
+ stack[9] = 0x25;
+ *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
+
+ /* pushq %r11 */
+ stack[14] = 0x41;
+ stack[15] = 0x53;
+
+ /* pushq $<cs_seg> */
+ stack[16] = 0x68;
+ *(u32 *)&stack[17] = cs_seg;
+
+ /* movq $syscall_enter,%r11 */
+ stack[21] = 0x49;
+ stack[22] = 0xbb;
+ *(void **)&stack[23] = (void *)syscall_enter;
+
+ /* jmpq *%r11 */
+ stack[31] = 0x41;
+ stack[32] = 0xff;
+ stack[33] = 0xe3;
+
+ return 34;
+}
+
void __init percpu_traps_init(void)
{
char *stack_bottom, *stack;
/* NMI handler has its own per-CPU 1kB stack. */
init_tss[cpu].ist[1] = (unsigned long)&stack[3072];
- /*
- * Trampoline for SYSCALL entry from long mode.
- */
-
- /* Skip the NMI and DF stacks. */
- stack = &stack[3072];
+ /* Trampoline for SYSCALL entry from long mode. */
+ stack = &stack[3072]; /* Skip the NMI and DF stacks. */
wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
+ stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64);
- /* movq %rsp, saversp(%rip) */
- stack[0] = 0x48;
- stack[1] = 0x89;
- stack[2] = 0x25;
- *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
-
- /* leaq saversp(%rip), %rsp */
- stack[7] = 0x48;
- stack[8] = 0x8d;
- stack[9] = 0x25;
- *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
-
- /* pushq %r11 */
- stack[14] = 0x41;
- stack[15] = 0x53;
-
- /* pushq $FLAT_KERNEL_CS64 */
- stack[16] = 0x68;
- *(u32 *)&stack[17] = FLAT_KERNEL_CS64;
-
- /* jmp syscall_enter */
- stack[21] = 0xe9;
- *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
-
- /*
- * Trampoline for SYSCALL entry from compatibility mode.
- */
-
- /* Skip the long-mode entry trampoline. */
- stack = &stack[26];
+ /* Trampoline for SYSCALL entry from compatibility mode. */
wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
+ stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS32);
- /* movq %rsp, saversp(%rip) */
- stack[0] = 0x48;
- stack[1] = 0x89;
- stack[2] = 0x25;
- *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
-
- /* leaq saversp(%rip), %rsp */
- stack[7] = 0x48;
- stack[8] = 0x8d;
- stack[9] = 0x25;
- *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
-
- /* pushq %r11 */
- stack[14] = 0x41;
- stack[15] = 0x53;
-
- /* pushq $FLAT_KERNEL_CS32 */
- stack[16] = 0x68;
- *(u32 *)&stack[17] = FLAT_KERNEL_CS32;
-
- /* jmp syscall_enter */
- stack[21] = 0xe9;
- *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
-
- /*
- * Common SYSCALL parameters.
- */
-
+ /* Common SYSCALL parameters. */
wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
}
}
SECTIONS
{
- . = 0xFFFF830000100000;
+ . = __XEN_VIRT_START + 0x100000;
_start = .;
_stext = .; /* Text and read-only data */
.text : {
}
page = mfn_to_page(mfn);
- if ( unlikely(IS_XEN_HEAP_FRAME(page)) )
+ if ( unlikely(is_xen_heap_frame(page)) )
{
gdprintk(XENLOG_INFO, "gnttab_transfer: xen frame %lx\n",
(unsigned long)gop.mfn);
return free_pages;
}
+#define avail_for_domheap(mfn) \
+ (!allocated_in_map(mfn) && !is_xen_heap_frame(mfn_to_page(mfn)))
void end_boot_allocator(void)
{
unsigned long i;
int curr_free, next_free;
/* Pages that are free now go to the domain sub-allocator. */
- if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
+ if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) )
map_alloc(first_valid_mfn, 1);
for ( i = first_valid_mfn; i < max_page; i++ )
{
curr_free = next_free;
- next_free = !allocated_in_map(i+1);
+ next_free = avail_for_domheap(i+1);
if ( next_free )
map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
if ( curr_free )
printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
}
+#undef avail_for_domheap
/*
* Scrub all unallocated pages in all heap zones. This function is more
/* Re-check page status with lock held. */
if ( !allocated_in_map(mfn) )
{
- if ( IS_XEN_HEAP_FRAME(mfn_to_page(mfn)) )
+ if ( is_xen_heap_frame(mfn_to_page(mfn)) )
{
p = page_to_virt(mfn_to_page(mfn));
memguard_unguard_range(p, PAGE_SIZE);
* Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
* prevent merging of power-of-two blocks across the zone boundary.
*/
- if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
+ if ( ps && !is_xen_heap_frame(maddr_to_page(ps)-1) )
+ ps += PAGE_SIZE;
+ if ( !is_xen_heap_frame(maddr_to_page(pe)) )
pe -= PAGE_SIZE;
init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
ASSERT(!in_irq());
- if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
+ if ( unlikely(is_xen_heap_frame(pg)) )
{
/* NB. May recursively lock from relinquish_memory(). */
spin_lock_recursive(&d->page_alloc_lock);
/* 29-bit count of references to this frame. */
#define PGC_count_mask ((1UL<<29)-1)
-#define IS_XEN_HEAP_FRAME(_pfn) ((page_to_maddr(_pfn) < xenheap_phys_end) \
- && (page_to_maddr(_pfn) >= xen_pstart))
+#define is_xen_heap_frame(pfn) ((page_to_maddr(pfn) < xenheap_phys_end) \
+ && (page_to_maddr(pfn) >= xen_pstart))
extern void *xen_heap_start;
#define __pickle(a) ((unsigned long)a - (unsigned long)xen_heap_start)
/* 29-bit count of references to this frame. */
#define PGC_count_mask ((1UL<<28)-1)
-#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
+#define is_xen_heap_frame(pfn) (page_to_maddr(pfn) < xenheap_phys_end)
static inline struct domain *unpickle_domptr(u32 _domain)
{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
#define CONFIG_DMA_BITSIZE 32
+#define BOOT_TRAMPOLINE 0x90000
+#define boot_trampoline_pa(sym) \
+ (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE)
+#define boot_trampoline_va(sym) \
+ (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))), \
+ BOOT_TRAMPOLINE-__pa(trampoline_start)))
+#ifndef __ASSEMBLY__
+extern char trampoline_start[], trampoline_end[];
+extern char trampoline_realmode_entry[];
+extern unsigned int trampoline_xen_phys_start;
+extern unsigned char trampoline_cpu_started;
+#endif
+
#if defined(__x86_64__)
#define CONFIG_X86_64 1
* 0xffff804000000000 - 0xffff807fffffffff [256GB, 2^38 bytes, PML4:256]
* Reserved for future shared info with the guest OS (GUEST ACCESSIBLE).
* 0xffff808000000000 - 0xffff80ffffffffff [512GB, 2^39 bytes, PML4:257]
- * Read-only guest linear page table (GUEST ACCESSIBLE).
+ * Reserved for future use.
* 0xffff810000000000 - 0xffff817fffffffff [512GB, 2^39 bytes, PML4:258]
* Guest linear page table.
* 0xffff818000000000 - 0xffff81ffffffffff [512GB, 2^39 bytes, PML4:259]
* Compatibility machine-to-phys translation table.
* 0xffff828c40000000 - 0xffff828c7fffffff [1GB, 2^30 bytes, PML4:261]
* High read-only compatibility machine-to-phys translation table.
- * 0xffff828c80000000 - 0xffff82ffffffffff [462GB, PML4:261]
+ * 0xffff828c80000000 - 0xffff828cbfffffff [1GB, 2^30 bytes, PML4:261]
+ * Xen text, static data, bss.
+ * 0xffff828cc0000000 - 0xffff82ffffffffff [461GB, PML4:261]
* Reserved for future use.
* 0xffff830000000000 - 0xffff83ffffffffff [1TB, 2^40 bytes, PML4:262-263]
- * 1:1 direct mapping of all physical memory. Xen and its heap live here.
+ * 1:1 direct mapping of all physical memory.
* 0xffff840000000000 - 0xffff87ffffffffff [4TB, 2^42 bytes, PML4:264-271]
* Reserved for future use.
* 0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
/* Slot 256: read-only guest-accessible machine-to-phys translation table. */
#define RO_MPT_VIRT_START (PML4_ADDR(256))
#define RO_MPT_VIRT_END (RO_MPT_VIRT_START + PML4_ENTRY_BYTES/2)
-
-// current unused?
-#if 0
-/* Slot 257: read-only guest-accessible linear page table. */
-#define RO_LINEAR_PT_VIRT_START (PML4_ADDR(257))
-#define RO_LINEAR_PT_VIRT_END (RO_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
-#endif
-
/* Slot 258: linear page table (guest table). */
#define LINEAR_PT_VIRT_START (PML4_ADDR(258))
#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
/* Slot 261: compatibility machine-to-phys conversion table (1GB). */
#define RDWR_COMPAT_MPT_VIRT_START IOREMAP_VIRT_END
#define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + (1UL << 30))
-/* Slot 261: high read-only compatibility machine-to-phys conversion table (1GB). */
+/* Slot 261: high read-only compat machine-to-phys conversion table (1GB). */
#define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END
#define HIRO_COMPAT_MPT_VIRT_END (HIRO_COMPAT_MPT_VIRT_START + (1UL << 30))
+/* Slot 261: xen text, static data and bss (1GB). */
+#define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END)
+#define XEN_VIRT_END (XEN_VIRT_START + (1UL << 30))
/* Slot 262-263: A direct 1:1 mapping of all of physical memory. */
#define DIRECTMAP_VIRT_START (PML4_ADDR(262))
#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
#endif /* __i386__ */
#ifndef __ASSEMBLY__
-extern unsigned long xenheap_phys_end; /* user-configurable */
+extern unsigned long xen_phys_start, xenheap_phys_start, xenheap_phys_end;
#endif
/* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
#define PageSetSlab(page) ((void)0)
#define PageClearSlab(page) ((void)0)
-#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
+#define is_xen_heap_frame(pfn) ({ \
+ paddr_t maddr = page_to_maddr(pfn); \
+ ((maddr >= xenheap_phys_start) && (maddr < xenheap_phys_end)); \
+})
#if defined(__i386__)
#define pickle_domptr(_d) ((u32)(unsigned long)(_d))
#define mfn_valid(mfn) ((mfn) < max_page)
/* Convert between Xen-heap virtual addresses and machine addresses. */
-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-#define virt_to_maddr(va) ((unsigned long)(va)-PAGE_OFFSET)
-#define maddr_to_virt(ma) ((void *)((unsigned long)(ma)+PAGE_OFFSET))
-/* Shorthand versions of the above functions. */
#define __pa(x) (virt_to_maddr(x))
#define __va(x) (maddr_to_virt(x))
#ifndef __ASSEMBLY__
-#if CONFIG_PAGING_LEVELS == 3
-extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
-extern l3_pgentry_t idle_pg_table_l3[ROOT_PAGETABLE_ENTRIES];
-extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRIES];
-#else
extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
-extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
-#ifdef CONFIG_COMPAT
+#if CONFIG_PAGING_LEVELS == 3
+extern l2_pgentry_t idle_pg_table_l2[
+ ROOT_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES];
+#elif CONFIG_PAGING_LEVELS == 2
+#define idle_pg_table_l2 idle_pg_table
+#elif CONFIG_PAGING_LEVELS == 4
extern l2_pgentry_t *compat_idle_pg_table_l2;
extern unsigned int m2p_compat_vstart;
#endif
-#endif
void paging_init(void);
void setup_idle_pagetable(void);
-#endif
+#endif /* !defined(__ASSEMBLY__) */
#define __pge_off() \
do { \
#define __X86_32_PAGE_H__
#define __PAGE_OFFSET (0xFF000000)
+#define __XEN_VIRT_START __PAGE_OFFSET
+
+#define virt_to_maddr(va) ((unsigned long)(va)-DIRECTMAP_VIRT_START)
+#define maddr_to_virt(ma) ((void *)((unsigned long)(ma)+DIRECTMAP_VIRT_START))
#define VADDR_BITS 32
#define VADDR_MASK (~0UL)
#define ROOT_PAGETABLE_ENTRIES L4_PAGETABLE_ENTRIES
#define __PAGE_OFFSET (0xFFFF830000000000)
+#define __XEN_VIRT_START (0xFFFF828C80000000)
/* These are architectural limits. Current CPUs support only 40-bit phys. */
#define PADDR_BITS 52
#include <xen/config.h>
#include <asm/types.h>
+/* Physical address where Xen was relocated to. */
+extern unsigned long xen_phys_start;
+
+static inline unsigned long __virt_to_maddr(unsigned long va)
+{
+ ASSERT(va >= XEN_VIRT_START);
+ ASSERT(va < DIRECTMAP_VIRT_END);
+ ASSERT((va < XEN_VIRT_END) || (va >= DIRECTMAP_VIRT_START));
+ if ( va > DIRECTMAP_VIRT_START )
+ return va - DIRECTMAP_VIRT_START;
+ return va - XEN_VIRT_START + xen_phys_start;
+}
+#define virt_to_maddr(va) \
+ (__virt_to_maddr((unsigned long)(va)))
+#define maddr_to_virt(ma) \
+ ((void *)((unsigned long)(ma)+DIRECTMAP_VIRT_START))
+
/* read access (should only be used for debug printk's) */
typedef u64 intpte_t;
#define PRIpte "016lx"